1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
library(psych)
library(whitening)
library("vioplot")
library("rpart")

op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

Data Source https://archive.ics.uci.edu/ml/datasets/seeds

M. Charytanowicz, J. Niewczas, P. Kulczycki, P.A. Kowalski, S. Lukasik, S. Zak, ‘A Complete Gradient Clustering Algorithm for Features Analysis of X-ray Images’, in: Information Technologies in Biomedicine, Ewa Pietka, Jacek Kawa (eds.), Springer-Verlag, Berlin-Heidelberg, 2010, pp. 15-24.

1.2 The Data

seeds <- read.delim("~/GitHub/LatentBiomarkers/Data/seeds_dataset.txt", header=FALSE)
par(cex=0.5)

featnames <- c("area",
               "perimeter",
               "compactness",
               "length_of_kernel",
               "width_of_kernel",
               "asymmetry_coeff",
               "length_ker_groove",
               "class"
)
colnames(seeds) <- featnames
#seeds$class <- 1*(seeds$class == 1)
pander::pander(table(seeds$class))
1 2 3
70 70 70

1.3 Comparing IDeA vs PCA vs EFA

1.3.1 Getting the top correlated variables

plot(seeds[,colnames(seeds)!="class"],col=seeds$class,cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

rawCor <- cor(seeds[,colnames(seeds)!="class"])

  gplots::heatmap.2(abs(rawCor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")




diag(rawCor) <- 0;
getMaxcor <- apply(rawCor,2,max)
topCorrelated <- getMaxcor[which.max(getMaxcor)]
whotoMax <- getMaxcor[getMaxcor == topCorrelated]
plot(seeds[,names(whotoMax)],main="Top Correlated variables")

1.3.2 IDeA

IDeASeeds <- IDeA(seeds)
plot(IDeASeeds[,colnames(IDeASeeds)!="class"],col=seeds$class,cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

pander::pander(attr(IDeASeeds,"UPSTM"))
  area La_perimeter La_length_of_kernel La_width_of_kernel La_length_ker_groove
area 1 -0.446 0.228 -0.360 -0.146
perimeter 0 1.000 -0.836 0.524 0.000
length_of_kernel 0 0.000 1.000 0.000 0.000
width_of_kernel 0 0.000 0.000 1.000 0.000
length_ker_groove 0 0.000 0.000 0.000 1.000
pander::pander(getLatentCoefficients(IDeASeeds))
  • La_perimeter:

    area perimeter
    -0.446 1
  • La_length_of_kernel:

    area perimeter length_of_kernel
    0.228 -0.836 1
  • La_width_of_kernel:

    area perimeter width_of_kernel
    -0.36 0.524 1
  • La_length_ker_groove:

    area length_ker_groove
    -0.146 1


IDeACor <- cor(IDeASeeds[,colnames(IDeASeeds)!="class"])


  gplots::heatmap.2(abs(IDeACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "IDeA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")



pander::pander(colnames(IDeACor))

area, La_perimeter, compactness, La_length_of_kernel, La_width_of_kernel, asymmetry_coeff and La_length_ker_groove

diag(IDeACor) <- 0;
getMaxcor <- apply(IDeACor,2,max)
topCorrelated <- getMaxcor[which.max(getMaxcor)]
whotoMax <- getMaxcor[getMaxcor == topCorrelated]
plot(IDeASeeds[,names(whotoMax)],main="IDeA: Top Correlated variables")

plot(IDeASeeds[,c("area","La_perimeter")],main="IDeA: Top Raw Correlated variables")

1.3.3 PCA

featuresnames <- colnames(seeds)[colnames(seeds) != "class"]
pc <- prcomp(seeds[,featuresnames],center = TRUE,tol=0.01)   #principal components
PCAseed <- as.data.frame(cbind(predict(pc,seeds[,featuresnames]),class=seeds$class))
plot(PCAseed[,colnames(PCAseed)!="class"],col=seeds$class,cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)


pander::pander(pc$rotation)
  PC1 PC2 PC3 PC4 PC5 PC6
area -0.88423 0.10081 0.2645 0.1994 -0.13717 0.28064
perimeter -0.39541 0.05649 -0.2825 -0.5788 0.57476 -0.30156
compactness -0.00431 -0.00289 0.0590 0.0578 -0.05310 -0.04523
length_of_kernel -0.12854 0.03062 -0.4001 -0.4361 -0.78700 -0.11344
width_of_kernel -0.11106 0.00237 0.3192 0.2342 -0.14480 -0.89627
asymmetry_coeff 0.12762 0.98941 0.0643 -0.0251 -0.00158 0.00329
length_ker_groove -0.12897 0.08223 -0.7619 0.6134 0.08765 -0.10992


PCACor <- cor(PCAseed[,colnames(PCAseed)!="class"])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.3.4 EFA

featuresnames <- colnames(seeds)[colnames(seeds) != "class"]
uls <- fa(seeds[,featuresnames],length(featuresnames)-1,rotate="varimax",warnings=FALSE)  # EFA analysis
EFAseed <- as.data.frame(cbind(predict(uls,seeds[,featuresnames]),class=seeds$class))
plot(EFAseed[,colnames(EFAseed)!="class"],col=seeds$class,cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

pander::pander(uls$weights)
  MR1 MR2 MR3 MR4 MR5 MR6
area 0.22124 0.552 -2.025 2.009 0.699 3.97e-14
perimeter 0.06513 -1.640 5.327 -2.515 3.887 -6.30e-14
compactness -0.26831 0.673 1.261 -2.093 -1.291 -9.32e-15
length_of_kernel 0.32135 -0.314 0.606 0.811 -5.108 1.22e-14
width_of_kernel 0.13570 1.124 -3.346 3.283 0.705 1.84e-14
asymmetry_coeff 0.00711 0.073 -0.261 0.171 -0.113 -8.50e-16
length_ker_groove 0.38754 0.338 -1.442 -2.401 0.474 -1.16e-15


EFACor <- cor(EFAseed[,colnames(EFAseed)!="class"])


  gplots::heatmap.2(abs(EFACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "EFA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.4 Effect on CAR modeling

par(op)
par(xpd = TRUE)
seeds$class <- factor(seeds$class)
rawmodel <- rpart(class~.,seeds,control=rpart.control(maxdepth=3))
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)

pr <- predict(rawmodel,seeds,type = "class")
pander::pander(table(seeds$class,pr))
1 2 3
62 1 7
2 68 0
1 0 69
pander::pander(c(accuracy=sum(seeds$class==pr)/nrow(seeds)))
accuracy
0.948

IDeASeeds$class <- factor(IDeASeeds$class)
IDeAmodel <- rpart(class~.,IDeASeeds,control=rpart.control(maxdepth=3))
plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)

pr <- predict(IDeAmodel,IDeASeeds,type = "class")
pander::pander(table(IDeASeeds$class,pr))
1 2 3
62 4 4
2 68 0
3 0 67
pander::pander(c(accuracy=sum(IDeASeeds$class==pr)/nrow(seeds)))
accuracy
0.938


PCAseed$class <- factor(PCAseed$class)
PCAmodel <- rpart(class~.,PCAseed,control=rpart.control(maxdepth=3))
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)

pr <- predict(PCAmodel,PCAseed,type = "class")
pander::pander(table(PCAseed$class,pr))
1 2 3
62 2 6
6 64 0
5 0 65
pander::pander(c(accuracy=sum(PCAseed$class==pr)/nrow(seeds)))
accuracy
0.91


EFAseed$class <- factor(EFAseed$class)
EFAmodel <- rpart(class~.,EFAseed,control=rpart.control(maxdepth=3))
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)

pr <- predict(EFAmodel,EFAseed,type = "class")
pander::pander(table(EFAseed$class,pr))
1 2 3
61 1 8
0 67 3
1 0 69
pander::pander(c(accuracy=sum(EFAseed$class==pr)/nrow(seeds)))
accuracy
0.938

par(op)

1.5 Effect on logisitc modeling

seeds$class <- 1*(seeds$class == 1)
IDeASeeds$class <- seeds$class
PCAseed$class <- seeds$class
EFAseed$class <- seeds$class
rawmodel <- glm(class~.,seeds,family="binomial")
pander::pander(summary(rawmodel))
  Estimate Std. Error z value Pr(>|z|)
(Intercept) -766.71 256.573 -2.988 0.00281
area -22.49 7.541 -2.983 0.00285
perimeter 43.63 17.229 2.533 0.01132
compactness 507.47 184.068 2.757 0.00583
length_of_kernel 50.07 19.912 2.514 0.01192
width_of_kernel -16.23 21.499 -0.755 0.45038
asymmetry_coeff -1.95 0.644 -3.037 0.00239
length_ker_groove -37.40 12.993 -2.878 0.00400

(Dispersion parameter for binomial family taken to be 1 )

Null deviance: 267.3 on 209 degrees of freedom
Residual deviance: 26.3 on 202 degrees of freedom
plot(rawmodel)


IDeAmodel <- glm(class~.,IDeASeeds,family="binomial")
pander::pander(summary(IDeAmodel))
  Estimate Std. Error z value Pr(>|z|)
(Intercept) -766.71 256.573 -2.988 0.00281
area -3.28 1.091 -3.005 0.00266
La_perimeter 94.00 30.924 3.040 0.00237
compactness 507.47 184.068 2.757 0.00583
La_length_of_kernel 50.07 19.912 2.514 0.01192
La_width_of_kernel -16.23 21.499 -0.755 0.45038
asymmetry_coeff -1.95 0.644 -3.037 0.00239
La_length_ker_groove -37.40 12.993 -2.878 0.00400

(Dispersion parameter for binomial family taken to be 1 )

Null deviance: 267.3 on 209 degrees of freedom
Residual deviance: 26.3 on 202 degrees of freedom
plot(IDeAmodel)



PCAmodel <- glm(class~.,PCAseed,family="binomial")
pander::pander(summary(PCAmodel))
  Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.49 0.659 -3.78 1.54e-04
PC1 0.42 0.168 2.50 1.25e-02
PC2 -3.49 0.789 -4.42 9.73e-06
PC3 13.44 3.173 4.23 2.29e-05
PC4 -40.30 9.321 -4.32 1.54e-05
PC5 -34.43 12.779 -2.69 7.06e-03
PC6 -26.49 12.911 -2.05 4.02e-02

(Dispersion parameter for binomial family taken to be 1 )

Null deviance: 267 on 209 degrees of freedom
Residual deviance: 44 on 203 degrees of freedom
plot(PCAmodel)



EFAmodel <- glm(class~.,EFAseed,family="binomial")
pander::pander(summary(EFAmodel))
  Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.47e+00 6.24e-01 -3.963 7.40e-05
MR1 -2.62e+00 6.62e-01 -3.961 7.46e-05
MR2 7.39e-01 5.15e-01 1.434 1.52e-01
MR3 6.14e+00 1.40e+00 4.390 1.13e-05
MR4 2.14e+00 6.93e-01 3.087 2.02e-03
MR5 -1.70e+00 6.54e-01 -2.607 9.15e-03
MR6 8.73e+13 2.13e+14 0.409 6.82e-01

(Dispersion parameter for binomial family taken to be 1 )

Null deviance: 267.3 on 209 degrees of freedom
Residual deviance: 47.7 on 203 degrees of freedom
plot(EFAmodel)


par(op)

1.5.0.1 Standarize the names for the reporting

studyName <- "Seeds"
dataframe <- seeds
outcome <- "class"
thro <- 0.80
TopVariables <- 5
cexheat = 0.45

1.6 Generaring the report

1.6.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")

1.6.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
210 7
pander::pander(table(dataframe[,outcome]))
0 1
140 70

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1000

1.6.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]


dataframe <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.7 The heatmap of the data


if (!largeSet)
{
  
  hm <- heatMaps(data=dataframe,
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.7.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.9944521

1.8 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  Included: 7 , Uni p: 0.03711537 , Uncorrelated Base: 3 , Outcome-Driven Size: 0 , Base Size: 3 
#> 
#> 
 1 <R=0.994,w=  1,N=    3>, Top: 1( 1 )[ 1 : 1 : 0.972 ]( 1 , 1 , 0 ),<|>Tot Used: 2 , Added: 1 , Zero Std: 0 , Max Cor: 0.972
#> 
 2 <R=0.972,w=  2,N=    3>, Top: 1( 2 )[ 1 : 1 : 0.936 ]( 1 , 2 , 1 ),<|>Tot Used: 4 , Added: 2 , Zero Std: 0 , Max Cor: 0.862
#> 
 3 <R=0.862,w=  3,N=    4>, Top: 2( 1 )[ 1 : 2 : 0.831 ]( 2 , 2 , 1 ),<|>Tot Used: 5 , Added: 2 , Zero Std: 0 , Max Cor: 0.809
#> 
 4 <R=0.809,w=  4,N=    2>, Top: 1( 1 )[ 1 : 1 : 0.800 ]( 1 , 1 , 2 ),<|>Tot Used: 5 , Added: 1 , Zero Std: 0 , Max Cor: 0.715
#> 
 5 <R=0.000,w=  5,N=    0>
#> 
 [ 5 ], 0.5987399 Decor Dimension: 5 . Cor to Base: 4 , ABase: 1 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

4.75

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

2.48

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

5.01

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

4.55

1.8.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPSTM <- attr(DEdataframe,"UPSTM")
  
  gplots::heatmap.2(1.0*(abs(UPSTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
}

1.9 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe,
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.10 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after IDeA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.714948

1.11 U-MAP Visualization of features

1.11.1 The UMAP based on LASSO on Raw Data

classes <- unique(dataframe[,outcome])
raincolors <- rainbow(length(classes))
names(raincolors) <- classes
datasetframe.umap = umap(scale(dataframe[,varlist]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
text(datasetframe.umap$layout,labels=dataframe[,outcome],col=raincolors[dataframe[,outcome]+1])

1.11.2 The decorralted UMAP


datasetframe.umap = umap(scale(DEdataframe[,varlistc]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
text(datasetframe.umap$layout,labels=DEdataframe[,outcome],col=raincolors[DEdataframe[,outcome]+1])

1.12 Univariate Analysis

1.12.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")



univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

1.12.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
asymmetry_coeff -0.5636 0.690 0.348 0.800 9.86e-01 0.810
length_ker_groove -0.2900 0.421 0.464 0.781 2.62e-03 0.764
compactness 0.2829 0.685 -0.273 1.041 5.53e-01 0.653
length_of_kernel -0.0626 0.430 0.257 0.919 3.80e-04 0.562
perimeter -0.0320 0.341 0.192 0.888 1.91e-05 0.524


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]

theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]

pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
La_length_ker_groove -0.27554 0.293 0.3092 0.266 0.851 0.929
asymmetry_coeff -0.56364 0.690 0.3484 0.800 0.986 0.810
La_width_of_kernel -0.00490 0.103 -0.1023 0.101 0.850 0.749
compactness 0.28287 0.685 -0.2733 1.041 0.553 0.653
La_length_of_kernel -0.00498 0.108 0.0363 0.142 0.469 0.607

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")

theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))


theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)

pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
2.5 4 0.571


allSigvars <- names(dc)



dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
  coef <- theFormulas[[dx]]
  cname <- names(theFormulas[[dx]])
  names(cname) <- cname
  for (cf in names(coef))
  {
    if (cf != dx)
    {
      if (coef[cf]>0)
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
      }
      else
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("%5.3f*%s",coef[cf],cname[cf]))
      }
    }
  }
}

finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
La_length_ker_groove -0.873area + 1.000length_ker_groove -0.27554 0.293 0.3092 0.266 8.51e-01 0.929 0.764 -1
asymmetry_coeff -0.56364 0.690 0.3484 0.800 9.86e-01 0.810 0.810 NA
length_ker_groove NA -0.28998 0.421 0.4639 0.781 2.62e-03 0.764 0.764 NA
La_width_of_kernel -2.936area + 1.922perimeter + 1.000*width_of_kernel -0.00490 0.103 -0.1023 0.101 8.50e-01 0.749 0.501 -2
compactness 0.28287 0.685 -0.2733 1.041 5.53e-01 0.653 0.653 NA
La_length_of_kernel + 1.602area -2.629perimeter + 1.000*length_of_kernel -0.00498 0.108 0.0363 0.142 4.69e-01 0.607 0.562 -2
length_of_kernel NA -0.06258 0.430 0.2567 0.919 3.80e-04 0.562 0.562 NA
perimeter NA -0.03199 0.341 0.1918 0.888 1.91e-05 0.524 0.524 NA
area NA -0.01654 0.324 0.1772 0.896 1.24e-05 0.515 0.515 4
width_of_kernel NA 0.00803 0.385 0.0493 0.947 2.79e-03 0.501 0.501 NA